import json
from openai import OpenAI
from tqdm import tqdm
client = OpenAI(api_key="", organization="")

with open("data/python/mbpp/ic_mbpp.json") as f:
    problem_dicts = json.load(f)

def problem2prompt(problem_dict:dict)->str:
    def add_test_to_prompt(query, function_names, test=None):
        problem_prompt = query + f" Your response should have the following function signature(s): {','.join(function_names)}. "
        if test is not None:
            problem_prompt += f"Additionally, your response should pass the following test: {test}."
        return problem_prompt

    def extract_func_names_from_snippet(s):
        func_signatures = []
        for l in s.split("\n"):
            if l.startswith("def "):
                func_signatures.append(l.split(":")[0])
        return func_signatures

    function_names = extract_func_names_from_snippet(problem_dict['gold'])
    if 'tests' in problem_dict.keys():
        test = problem_dict['tests'][0] if len(problem_dict['tests']) > 0 else None
        problem_prompt = add_test_to_prompt(problem_dict["query"], function_names, test)
    else:
        problem_prompt = problem_dict["query"] + f"  Your response should be a bash command."
    return problem_prompt

def get_response(user_prompt):
    input = "Rewrite the following request in a child's tone. Do not include information that is not included in the request. Don't use questions.\n\n"
    input += user_prompt
    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": input},
            ],
        temperature=0.7 
        )
    return response.choices[0].message.content

def rephrase(user_prompt):
    input = "Rephrase the following sentences. Add or change some details to make the sentences more diverse. Do not include extra words other than the rephrased sentences.\n\n"
    input += user_prompt
    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": input},
            ],
        temperature=0.7,
        top_p=0.8,
        )
    return response.choices[0].message.content
res = []
# for problem_dict in tqdm(problem_dicts):
#     user_prompt = problem2prompt(problem_dict)
#     # print(user_prompt)
#     # print(get_response(user_prompt))
#     child_prompt = get_response(user_prompt)
#     res.append(child_prompt)
# save_path_name = "./gpt4-ft/child.jsonl" 
# with open(save_path_name, "w") as f:
#     for messages in res:
#         prompt = {"query": messages}
#         f.write(json.dumps(prompt) + "\n")

generate_num = 40
save_path_name = "./gpt4-ft/intro.jsonl" 
prompt_non_expert = '''\
You are going to ask a stranger to help you write Python code to solve a problem. \
You are not majoring in computer science and do not have much knowledge in coding. \
Write a brief introduction of your background before asking for help to help others better understand your situation, ideally consisting of less than four sentences. \
You can imagine other information such as your major and age. Only describe your background and do not include any information on a specific coding problem.\
Generate 10 samples and make sentence patterns and word usage diverse.\
'''

answers_string = """
Hello, I'm a 22-year-old English Literature major with very little experience in programming. I'm currently facing a challenge that requires some Python coding.
Hi! I'm majoring in Art History and am 25 years old. I've never really dealt with computers beyond basic tasks and now I find myself needing some help with Python.
Greetings! I'm a third-year Psychology student and all things tech are a bit foreign to me. Could use some guidance with a Python project.
Hi there, I'm a 20-year-old student in Environmental Science. I haven't had any coding classes, but I need to tackle a Python-related task.
Hello! I'm a 24-year-old Political Science major. I'm not well-versed in coding but need some Python help for a project.
I'm a 21-year-old Biology student with minimal exposure to programming. I find myself in need of some Python assistance.
Hi, I'm studying Sociology and at 23, my experience with coding is practically non-existent. I could really use some help with Python.
Hey, I'm a 19-year-old Music major. Coding is quite out of my league, but I need to learn some Python for a class project.
Hello, I'm in my fourth year of studying Philosophy, age 26, with no background in coding. I need some help with Python.
Hi there! I'm a 22-year-old Geography student and coding is not my forte. However, I need some Python help.
Greetings, I'm a 24-year-old Theatre major with no coding experience. I need to use Python for a current project.
Hi, I'm a 23-year-old Journalism student and coding is unfamiliar territory for me. I need some help with Python.
Hello! At 21, studying Anthropology, I find myself needing to understand Python, which is quite outside my usual studies.
Hi, I'm a senior in Urban Planning, 22 years old, with no coding background. I'm looking for Python coding assistance.
I'm a 20-year-old Nursing student with zero coding experience. I need some help with Python for a class requirement.
Hey there, I'm a 21-year-old student in Veterinary Science. I've never done coding before but need some Python help.
Hello, I'm a 25-year-old International Relations major. Coding is not my area, but I need some Python assistance.
Hi, I'm a 22-year-old student specializing in Media Studies. I have no experience with programming but need Python help.
Greetings! I'm a 23-year-old student in Public Health, and coding is quite new to me. I need some Python guidance.
Hi, I'm a 24-year-old student in Environmental Studies, and I've never coded before. I need Python help for my thesis.
Hello, I'm a 21-year-old History major and coding is not in my skill set. I'm looking for Python assistance.
Hi there! I'm a 26-year-old student in Gender Studies, and I've never handled coding. I need some help with Python.
Greetings, I'm a 20-year-old Business major with no experience in programming. I need to learn some Python basics.
Hi, I'm a 22-year-old Economics student. I've never worked with Python before and could use some help now.
Hey, I'm a 23-year-old student in Marine Biology, and I'm not familiar with any programming. I need Python help.
Hello, I'm 25 and studying Fine Arts. I find myself needing to work with Python, which is outside my expertise.
Hi, I'm a 19-year-old Communications major. I'm completely new to programming but need to use Python for a project.
Greetings! I'm a 22-year-old student in Kinesiology and have no coding background. I need some help with Python.
Hi, I'm a 24-year-old student focusing on Sports Management, and I've never coded before. Need some Python tips.
Hello, I'm a 21-year-old student in Nutrition Science. I've never learned coding and now need some Python help.
Greetings, I'm a 20-year-old Archaeology student and programming is unfamiliar to me. Could use Python help.
Hi there! I'm a 23-year-old Linguistics major. Coding is not my strong suit but I need to use Python for research.
Hello, I'm a 25-year-old student majoring in Dance. I have no experience with coding but need to learn Python for a project.
Hi, I'm a 22-year-old student in Veterinary Medicine. I've never coded but now find myself needing Python assistance.
Greetings! I'm a 24-year-old Forestry student and coding is not my area of expertise. I need some help with Python.
Hello, I'm a 23-year-old student in Astrophysics. Coding, particularly Python, is something I need help with.
Hi, I'm a 21-year-old student in Graphic Design. I have no coding experience and need some help with Python.
Greetings, I'm a 24-year-old Philosophy student. Coding is new to me but I need to master some Python basics.
Hi there! I'm a 19-year-old student in Hospitality Management. I've never worked with Python and need some guidance.
Hello, I'm a 22-year-old student in Aerospace Engineering with little coding knowledge. I need Python help for my coursework.
"""
claude_none_expert = [
    "Hi there! I'm a sophomore studying biology and have only taken one intro to programming course. I'm pretty new to coding but excited to learn more. Would you be able to help me with a Python problem?",
    "Hey! I'm majoring in economics and have dabbled a bit in data analysis using Excel. I thought it would be useful to pick up some Python skills for my future career. Do you have a few minutes to assist a beginner like me with a coding issue?",
    "Hello! As a senior English literature student, I never thought I'd find myself venturing into the world of programming. However, I recently discovered the power of Python for text analysis and I'm hooked! I'm hoping you might be able to lend a hand with a tricky bit of code.",
    "Greetings! I'm a first-year psychology student and I've been trying to teach myself Python in my spare time. It's been a bit of a steep learning curve, but I'm determined to make progress. If you have a moment, I'd really appreciate some guidance on a programming problem.",
    "Hi! I'm a junior studying environmental science and I'm working on a project that involves a fair amount of data processing. I've heard that Python is a great tool for this, but I'm still very much a novice. Could you possibly help me out with a coding conundrum?",
    "Hey there! As a freshman in the business school, I've quickly realized that having some coding skills under my belt could be a real asset. I've started learning Python, but I've run into a roadblock. Would you be willing to help a newbie like me?",
    "Hello! I'm a third-year student studying mechanical engineering. While I'm comfortable with languages like MATLAB, I'm new to Python. I'm trying to use it for a project and could really use some assistance from someone more experienced.",
    "Hi! I'm a sophomore art history major, and I never thought I'd find myself asking for help with coding. However, I'm working on a digital humanities project that requires some Python knowledge. If you have a bit of time, I'd be so grateful for your guidance.",
    "Hey! As a senior in the nursing program, I'm exploring ways to use Python for healthcare data analysis. It's a brand new world for me, and I'm feeling a bit lost. Could you possibly lend a hand with a programming problem?",
    "Greetings! I'm a second-year chemistry student and I've been trying to learn Python to help with my research. It's been a challenging journey so far, but I'm eager to improve. If you're available, I'd really appreciate some help with a coding issue.",
    "Hi there! I'm a junior studying anthropology and I've recently discovered the potential of Python for analyzing social science data. As someone with minimal coding experience, I'm finding it a bit overwhelming. Would you be able to assist me with a Python problem?",
    "Hello! I'm a freshman in the communications department and I've been trying to expand my skill set by learning Python. It's a whole new language for me (pun intended), and I could really use some guidance. If you have a moment, I'd be so appreciative of your help.",
    "Hey! As a senior in the education program, I'm exploring ways to incorporate coding into my future classroom. I've started with Python, but I've hit a stumbling block. Could you possibly lend your expertise to a teacher-in-training?",
    "Hi! I'm a second-year physics major and I've been using Python for some of my simulations. While I'm comfortable with the basics, I've encountered a problem that's beyond my current knowledge. Would you be willing to help a fellow science student out?",
    "Greetings! I'm a junior studying geography and I've been working on a GIS project that involves some Python scripting. As someone who's still very new to coding, I'm finding it quite challenging. If you have a bit of time, I'd be incredibly grateful for your assistance.",
    "Hey there! I'm a freshman in the music department and I've recently discovered the world of algorithmic composition using Python. It's a fascinating blend of art and technology, but I'm definitely struggling with the coding aspect. Could you possibly help me out with a programming problem?",
    "Hello! As a senior in the philosophy program, I never imagined I'd be reaching out for help with Python. However, I'm working on a project that involves natural language processing and I'm in a bit over my head. If you're available, I'd really appreciate some guidance from someone more experienced.",
    "Hi! I'm a sophomore studying sports management and I've been trying to learn Python to analyze athlete performance data. It's a whole new ballgame for me (pun intended), and I could really use some help. Would you be willing to assist a coding rookie?",
    "Hey! I'm a junior in the film studies department and I've been experimenting with Python for video processing. While I'm comfortable with the creative side of things, the technical aspects are proving to be quite a challenge. If you have a moment, I'd be so grateful for your help with a coding issue.",
    "Greetings! As a first-year student in the architecture program, I've been exploring ways to use Python for computational design. It's a fascinating intersection of art and technology, but I'm definitely struggling with the programming side of things. Could you possibly lend a hand to an aspiring architect?"
]

# Split the string into a list of answers based on the newline character
answers_list = answers_string.strip().split('\n')
for i in tqdm(range(20, 40)):
    answers_list[i] = claude_none_expert[i-20]
# Print the list to verify it contains all the answers correctly
for answer in answers_list:
    res.append(answer)
# exit()
# for i in tqdm(range(generate_num)):
#     response = get_response(prompt_non_expert)
#     res.append(response)

prompt_expert = '''\
You are going to ask a stranger to help you write Python code to solve a problem. \
Your major requires a lot of coding (not necessarily computer science) so you have a decent knowledge of coding. \
Write a brief introduction of your background before asking for help to help others better understand your situation, \
ideally consisting of less than four sentences. You can imagine other information such as your major and age. \
Only describe your background and do not include any information on a specific coding problem.\
Generate 10 samples and make sentence patterns and word usage diverse.\
'''

# for i in tqdm(range(generate_num)):
#     response = get_response(prompt_expert)
#     res.append(response)

answers_text = """
I'm a 20-year-old engineering student with a solid foundation in coding, mainly in Python and C++. I've applied programming to several class projects and internships but occasionally need assistance with new challenges.
As a junior in applied mathematics, I often use Python for data analysis and simulation tasks. Although I'm comfortable with basic scripting, I sometimes struggle with more complex algorithms.
I am a 22-year-old physics major with experience in using coding for simulations and problem-solving. My primary coding language is Python, and I've been coding for my coursework and personal projects.
Currently a sophomore in bioinformatics, I've been learning Python and R to handle large datasets in my courses. I find myself proficient but occasionally need help with advanced coding techniques.
I'm a 21-year-old student in aerospace engineering, where I often apply Python and MATLAB to solve fluid dynamics problems. While I understand the fundamentals well, I sometimes face difficulties with specific applications.
As a computer engineering student, I'm well-versed in Python and Java. I use coding extensively in my projects, but I seek guidance for optimizing and debugging complex code.
I'm a 23-year-old chemical engineering major, and I use Python primarily for modeling chemical processes. I'm generally comfortable with coding but need help with more sophisticated scripts.
I am a third-year statistics student who regularly uses Python for data visualization and statistical analysis. I have a decent grasp of coding but look for assistance with more intricate coding challenges.
As a 19-year-old environmental science student, I've started integrating Python into my studies for data analysis and modeling. My coding experience is growing, yet I occasionally need expert advice.
I'm a 20-year-old mechanical engineering student with a good background in Python and MATLAB. I often use programming for simulations but need help when tackling more complex problems.
As a sophomore studying neuroscience, I frequently use Python to analyze neurological data. While I'm confident in basic programming, I occasionally struggle with advanced topics.
I am a 22-year-old data science major with robust experience in Python, especially in machine learning applications. I find myself occasionally stumped by some of the more complex aspects of algorithms.
As a junior in robotics, I regularly program in Python and C++ to control robotic movements. I am generally proficient but sometimes need help with debugging and performance optimization.
I'm a 21-year-old genetics major, increasingly using Python to analyze genetic sequences. My programming skills are decent, but I sometimes face challenges with specific tasks.
Currently a senior in computer science, I have a strong background in multiple programming languages including Python. However, I occasionally seek help with particularly tricky problems or new libraries.
I am a 20-year-old geophysics student who uses Python for seismic data analysis. While I'm comfortable with code for standard tasks, I sometimes need help with more complex algorithms.
As a 19-year-old electrical engineering student, I've been coding in Python and C for hardware interfacing and data analysis. I'm looking to deepen my understanding and solve more intricate problems.
I'm a 23-year-old software engineering major familiar with several programming languages, including Python. While I am confident in my abilities, I occasionally encounter areas where I require additional insight.
As a junior in marine biology, I use Python to analyze ecological data. Although I handle basic programming tasks well, I seek help for advanced data analysis techniques.
I am a 21-year-old astrophysics student using Python primarily for astronomical simulations. I generally manage well but sometimes struggle with the specifics of complex simulations.
As a 22-year-old health informatics student, I frequently apply Python to manage and analyze medical data. While proficient in general coding, I occasionally need help with specialized healthcare algorithms.
I'm a 20-year-old student in artificial intelligence, deeply engaged with Python for various AI projects. I am comfortable with basic and intermediate coding but seek help for advanced issues.
As a senior in urban planning, I've been learning Python to analyze urban growth patterns. My coding skills are developing, but I sometimes need assistance with spatial analysis techniques.
I am a 19-year-old petroleum engineering student who uses MATLAB and Python for fluid flow simulations. While I have a good grasp on general coding, specific simulation challenges sometimes stump me.
As a 21-year-old quantum computing student, I use Python for algorithm development and simulations. Proficient in many aspects, I sometimes find quantum-specific coding complex.
I'm a 22-year-old material science student, increasingly using Python for material property analysis. I'm generally confident in coding but look for guidance on more complex material simulations.
As a third-year cybernetics student, I often code in Python for system modeling and control algorithms. While I have solid basic skills, I occasionally struggle with more advanced systems.
I am a 20-year-old agricultural sciences student using Python to model crop yield predictions. My coding proficiency is adequate for most tasks, but I sometimes face difficulties with advanced modeling techniques.
As a 24-year-old pharmacology student, I use Python for drug interaction simulations. I am generally comfortable with coding but seek help for more complex pharmacological models.
I'm a 21-year-old anthropology student, using Python to analyze anthropological data. While I have basic coding skills, I sometimes need help with more detailed analysis techniques.
As a 22-year-old sports science major, I've started using Python to analyze athletic performance data. I'm learning quickly but occasionally need help with performance prediction models.
I am a 23-year-old biotechnology student proficient in Python for genetic engineering applications. Occasionally, I encounter specific coding problems that require expert advice.
As a junior in acoustic engineering, I use Python and MATLAB for sound analysis and modeling. While I handle most tasks well, I sometimes need assistance with complex acoustic simulations.
I'm a 20-year-old meteorology student using Python for weather prediction models. I am adept at coding for basic tasks but sometimes need help with more sophisticated modeling.
As a 19-year-old textile engineering student, I've been learning Python to analyze fabric properties. I'm gaining skills but occasionally need help with specific analysis tasks.
I am a 22-year-old nuclear engineering student using Python for reactor simulation tasks. While proficient in general, specific aspects of nuclear simulations can be challenging.
As a 23-year-old linguistics student, I apply Python to natural language processing tasks. My coding skills are solid, but I sometimes seek help for more complex linguistic models.
I'm a 21-year-old environmental engineering student, increasingly using Python for environmental impact simulations. While I understand basic programming, I sometimes struggle with specific environmental models.
As a 20-year-old optical engineering student, I use Python for light simulation and analysis. I'm generally confident with coding but occasionally face challenges with advanced optical phenomena.
I am a 22-year-old public health student using Python for epidemiological data analysis. While I am proficient with basic tasks, I occasionally need guidance on more complex epidemiological models.
"""

claude_expert = [
    "Hi there! I'm a 20-year-old biology major and I've been learning Python to analyze genetic data for my research project. I have a solid grasp of the basics but could use some guidance on more advanced techniques.",
    "Hey! As a mechanical engineering student in my junior year, I've gained experience coding in Python for various projects. I'm pretty comfortable with the fundamentals but am always eager to learn new ways to optimize my code.",
    "Hello! I'm a senior studying environmental science and I've been using Python to create models for predicting climate change impacts. While I've got a good handle on data manipulation and visualization, I'm looking to expand my skills.",
    "Hi! I'm a 19-year-old physics major and I've been teaching myself Python to simulate particle interactions. I've made decent progress on my own but would appreciate some help tackling more complex problems.",
    "Hey there! As a second-year chemistry student, I've been utilizing Python to automate lab data analysis. I'm fairly proficient with libraries like NumPy and Pandas but am keen to learn best practices from more experienced coders.",
    "Hello! I'm a 21-year-old economics major and I've been honing my Python skills for data-driven market research. While I've got a solid foundation, I'm always looking for ways to take my code to the next level.",
    "Hi! As a first-year grad student in neuroscience, I've been using Python to process and visualize EEG data. I'm comfortable with the basics but could use some guidance on more efficient approaches to handling large datasets.",
    "Hey! I'm a senior studying aerospace engineering and I've been leveraging Python for flight simulation projects. I've got a good grasp of the fundamentals but am eager to learn advanced techniques to streamline my code.",
    "Hello! As a 22-year-old geology major, I've been employing Python to analyze seismic data for my thesis. While I've made good progress independently, I'd appreciate some insights on optimizing performance.",
    "Hi there! I'm a junior studying biomedical engineering and I've been using Python to develop algorithms for medical image processing. I'm pretty comfortable with the basics but am always looking to expand my skill set.",
    "Hey! As a third-year computer science student, I've gained a solid foundation in Python programming. While I'm proficient in various aspects of the language, I'm keen to learn industry best practices and advanced techniques.",
    "Hello! I'm a 20-year-old mathematics major and I've been leveraging Python for numerical analysis and data visualization. I've got a good handle on libraries like Matplotlib and SciPy but am eager to explore more complex applications.",
    "Hi! As a senior studying electrical engineering, I've been using Python to simulate and analyze circuit behavior. While I'm comfortable with the fundamentals, I'd appreciate some guidance on optimizing my code for larger-scale projects.",
    "Hey there! I'm a second-year physics student and I've been teaching myself Python for computational modeling. I've made decent headway on my own but could use some help tackling more advanced problems.",
    "Hello! As a 21-year-old chemistry major, I've been employing Python to automate data collection and analysis in my research. I'm fairly proficient with basic data structures and algorithms but am always looking to refine my skills.",
    "Hi! I'm a junior studying bioinformatics and I've been harnessing the power of Python for genome sequencing analysis. While I've got a solid grasp of the basics, I'm eager to learn best practices for handling massive datasets.",
    "Hey! As a first-year grad student in astrophysics, I've been utilizing Python to process and visualize telescope data. I'm comfortable with the fundamentals but could use some guidance on more sophisticated data manipulation techniques.",
    "Hello! I'm a 22-year-old mechanical engineering major and I've been using Python to optimize designs for robotics projects. While I've made good progress independently, I'd appreciate some insights on writing cleaner, more efficient code.",
    "Hi there! As a senior studying financial mathematics, I've been leveraging Python for quantitative analysis and algorithmic trading. I'm pretty proficient with libraries like NumPy and Pandas but am always looking to expand my knowledge.",
    "Hey! I'm a third-year neuroscience student and I've been employing Python to analyze fMRI data for my research project. While I've got a good handle on the basics, I'm keen to learn advanced techniques for signal processing and machine learning."
]

# Splitting the text into a list by new lines
answers_list = answers_text.strip().split('\n')
for i in tqdm(range(20, 40)):
    answers_list[i] = claude_expert[i-20]
for answer in answers_list:
    res.append(answer)
# print(answers_list)

# with open(save_path_name, "w") as f:
#     for messages in res[:generate_num]:
#         prompt = {"text": messages, "is_expert":0}
#         f.write(json.dumps(prompt) + "\n")
#     for messages in res[generate_num:]:
#         prompt = {"text": messages, "is_expert":1}
#         f.write(json.dumps(prompt) + "\n")

tech_companies = [
    "Amazon",
    "Facebook",
    "Alibaba",
    "Tencent",
    "Intel",
    "Cisco",
    "Oracle",
    "SAP",
    "Dell Technologies",
    "Hewlett Packard Enterprise",
    "Salesforce",
    "Adobe",
    "Sony",
    "LG Electronics",
    "Xiaomi",
    "ASUS",
    "Twitter",
    "LinkedIn",
    "Uber",
    "Airbnb",
    "Spotify",
    "Snap Inc.",
    "ByteDance",
    "Baidu",
    "TikTok",
    "Zoom",
    "Dropbox",
    "Square",
    "Shopify",
    "Roku",
    "Qualcomm",
    "Epic Games",
    "Netflix",
    "Rakuten",
    "Flipkart",
    # "JD.com",
    "eBay",
    "PayPal",
    "Stripe",
    "WeWork",
    "AMD",
    "NVIDIA",
    "Slack",
    "Yelp",
    "Zillow",
    "Google",
    "IBM",
    "Apple",
    "Microsoft",
    "Samsung Electronics"
]
non_tech_companies = [
    "Nike",
    "Coca-Cola",
    "Walmart",
    "Pepsi",
    "Ford",
    "UPS",
    "DHL",
    # "Toyota",
    # "General Motors",
    # "Volkswagen",
    "Nestle",
    "Procter & Gamble",
    "Unilever",
    "Johnson & Johnson",
    "Mondelez International",
    # "Pfizer",
    "Chevron",
    "ExxonMobil",
    "BP",
    "Shell",
    "BASF",
    # "Siemens",
    # "Boeing",
    "Airbus",
    "Disney",
    "McDonald's",
    "Berkshire Hathaway",
    "Goldman Sachs",
    "Morgan Stanley",
    "Citigroup",
    "Bank of America",
    "HSBC",
    "JPMorgan Chase",
    # "3M",
    # "Honeywell",
    "Caterpillar",
    "John Deere",
    "Kraft Heinz",
    "Starbucks",
    "Adidas",
    "LVMH",
    "Chanel",
    "IKEA",
    "The Home Depot",
    "Lowe's",
    "FedEx",
    "Target",
    "Costco",
    "L'Oréal",
    "Estee Lauder"
]
save_path_name = "./gpt4-ft/intro_company.jsonl" 
# with open(save_path_name, "w") as f:
#     for comp in non_tech_companies:
#         prompt = {"text": f"I am an employee from {comp}. I have a programming problem.", "is_tech":0}
#         f.write(json.dumps(prompt) + "\n")
#     for comp in tech_companies:
#         prompt = {"text": f"I am an employee from {comp}. I have a programming problem.", "is_tech":1}
#         f.write(json.dumps(prompt) + "\n")

cs_courses = [
    "Introduction to Programming",
    "Computer Architecture and Organization",
    "Operating Systems",
    "Theory of Computation",
    "Software Engineering",
    "Database Systems",
    "Computer Networks",
    "Data Structures and Algorithms",
    "Artificial Intelligence",
    "Web Development",
]

none_cs_courses = [
    "Introduction to Sociology",
    "Basic Photography",
    "General Biology",
    "Business Law",
    "Principles of Marketing",
    "Introduction to Political Science",
    "Cultural Anthropology",
    "Environmental Science",
    "Art History",
    "Sports Management",
    "Classical Mythology",
    "Comparative Literature",
    "International Relations",
    "Creative Writing",
    "Public Health",
    "Nutrition",
    "Theater Appreciation",
    "Introduction to Film Studies",
    "Medieval History",
    "Music Theory"
]

cs_courses = [
    "Introduction to Programming",
    "Computer Architecture and Organization",
    "Operating Systems",
    "Theory of Computation",
    "Software Engineering",
    "Nutrition",
    "Theater Appreciation",
    "Introduction to Film Studies",
    "Medieval History",
    "Music Theory"
]

import random

save_path_name = "./gpt4-ft/intro_course_fake.jsonl" 
with open(save_path_name, "w") as f:
    for j in range(20):
        idx = random.sample(range(10), 4)
        courses = [none_cs_courses[id] for id in idx]
        prompt = {"text": f"I have taken the following courses: {courses[0]}, {courses[1]}, {courses[2]}, and {courses[3]}. Now I have a programming problem.", "normal":1}
        f.write(json.dumps(prompt) + "\n")
    for j in range(20):
        idx = random.sample(range(10, 20), 4)
        courses = [none_cs_courses[id] for id in idx]
        prompt = {"text": f"I have taken the following courses: {courses[0]}, {courses[1]}, {courses[2]}, and {courses[3]}. Now I have a programming problem.", "normal":1}
        f.write(json.dumps(prompt) + "\n")
    for i in range(5):
        for j in range(4):
            idx = random.sample(range(10), 3)
            pos = random.randint(0,3)
            courses = [none_cs_courses[id] for id in idx]
            courses.insert(pos, cs_courses[i])
            prompt = {"text": f"I have taken the following courses: {courses[0]}, {courses[1]}, {courses[2]}, and {courses[3]}. Now I have a programming problem.", "normal":0}
            f.write(json.dumps(prompt) + "\n")
    for i in range(5, 10):
        for j in range(4):
            idx = random.sample(range(10, 20), 3)
            pos = random.randint(0,3)
            courses = [none_cs_courses[id] for id in idx]
            courses.insert(pos, cs_courses[i])
            prompt = {"text": f"I have taken the following courses: {courses[0]}, {courses[1]}, {courses[2]}, and {courses[3]}. Now I have a programming problem.", "normal":0}
            f.write(json.dumps(prompt) + "\n")


